*************************************************
*			01_eia923_netgen					*
*												*
*	Clean generation data from EIA923			*
*												*
*	Raw: 	EIA 906, 920, 923					*
*	Output: netgen_data.dta						*
*												*
*************************************************


* Run 00_data_prep to define $last_year


* Cleaning Program based on Aldy Gerarden Sweeny 
cap program drop fix_id_vars
program define fix_id_vars
  qui{
    cap rename *, lower
	forval i=1/5{
	cap rename *_* **
    }
	
	*facilityid
	cap ren plantid facilityid
  
	*facilityname
	cap ren plantname facilityname 
  
    *state
	cap ren plantstate state
  
    *netgen
	foreach var of varlist netgen* {
	local vname = substr("`var'",1,9)
	cap ren `var' `vname'
	}
  }
end



* Loop To Import all Years
forval i = 2001/ $last_year {
	
  ***
  * Set locals for importing from Excel
  ***

  
  if `i'<2008{
    local file "eia_f906920/f906920_`i'/f906920_`i'.xls"
    local start = "A8" 
  }
  else if `i' == 2021 {
    local file = "eia_f923/f923_`i'/EIA923_Schedules_2_3_4_5_M_12_2021_Final.xlsx"
	local start = "A6"
	}
	
  else if `i' == 2019 {
    local file = "eia_f923/f923_`i'/EIA923_Schedules_2_3_4_5_M_12_2019_Final_Revision.xlsx"
	local start = "A6"
	}
  
  else if `i' >= 2014 |`i'==2012 {
    local file "eia_f923/f923_`i'/EIA923_Schedules_2_3_4_5_M_12_`i'_Final_Revision.xlsx"
    local start = "A6"
  }

  else if `i' == 2008 {
    local file "eia_f923/f923_`i'/eia923December2008.xls"
	local start = "A8"
  }
  
  else if `i' == 2009{
    local file "eia_f923/f923_`i'/EIA923 SCHEDULES 2_3_4_5 M Final 2009 REVISED 05252011.xls"
    local start = "A8"
  }
  
  else if `i' == 2010{
    local file "eia_f923/f923_`i'/EIA923 SCHEDULES 2_3_4_5 Final 2010.xls"
    local start = "A8"
  }
  
  else {
    local file "eia_f923/f923_`i'/EIA923_Schedules_2_3_4_5_`i'_Final_Revision.xlsx"
    local start = "A6"
  }
  

  
  ***
  * Import each year's data
  ***  

  import excel "${wind}/data_raw/`file'", ///
		sheet("Page 1 Generation and Fuel Data") firstrow cellrange(`start') clear
	
  * Fix ID Variables
  fix_id_vars
  
  * Keep Real Wind Facilities
  keep if facilityid!=99999 & reportedfueltype=="WND"
  
  * Keep Variables
  keep facilityid facilityname operatorname operatorid ///
		state censusregion nercregion eiasectornumber sectorname netgen* year
		
  * Destring numeric variables		
  qui destring operatorid netgen*, replace
  
  * Save tempfile by year
  tempfile netgen`i'
  save "`netgen`i''"
  di "`i''"
  }

***
* Append all yearly net generation data
***  
  
clear
forval i = 2001/ $last_year  {
	append using "`netgen`i''"
}
  

* Remove Duplicate
drop if facilityid==61047 & netgenera==0  
drop netgenera

* Correct 2020 Error
foreach v of varlist *netgen* {
	bys facilityid year: egen temp = sum(`v')
	bys facilityid year: replace `v' = temp if facilityid==2022 & year==2020
	drop temp
}

* Deduplicate
duplicates drop 
duplicates tag facilityid year, gen(tag)
assert tag==0
drop tag  
  
* Reshape to Faciltty-Month Level
ren (*jan *feb *mar *apr *may *jun *jul *aug *sep *oct *nov *dec) (*1 *2 *3 *4 *5 *6 *7 *8 *9 *10 *11 *12)  
reshape long netgen , i(facilityid year) j(month) 


***
* Create Variables of Interest
***


* Remove Leading Zeros Before First Month of Production
count
bys facilityid (year month): gen n = _n 
count if inlist(netgen,0,.) & n==1
local x = `r(N)' 
while `x'!=0 {
  drop if inlist(netgen,0,.) & n==1
  bys facilityid (year month): replace n = _n 
  count  if inlist(netgen,0,.) & n==1
  local x = `r(N)'
}


* Create Month of Reported Production
gen num = (year-2001)*12 + month
bys facilityid: egen min = min(num)
gen count = num-min +1

count
drop n num min

/* Create first and last period 
by facilityid: egen min_count = min(count)
by facilityid: egen max_count = max(count)
*/

* Create vintage
bys facilityid (year): gen vintage = year[1]
drop if vintage==2001

* Cleaned Date
gen date = year*100+month*1
tostring date, replace
gen newdate = date(date+"01", "YMD")
replace newdate = mofd(newdate)
format newdate %tm
drop date

rename newdate date
 
* Balance Panel for firms that exit
drop if vintage==2001 

bys facilityid: egen m = min(date)
tsset facilityid date
tsfill, full
bys facilityid (m): drop if date<m[1]
sort date

* Clean up new tsfill obs
bys facilityid (date): gen count2  =_n 
assert count==count2 if count!=.
gen tsfill_flag = count==.
replace count = count2 if count==.
replace year = yofd(dofm(date)) if year==.
replace month = month(dofm(date)) if month==.
replace netgen = 0 if netgen==.

drop m count2

* Recreate annual net gen by year of opperation
gen year_count 	= floor((count-1)/12) + 1
egen netgen_annual = sum(netgen), by(facilityid year_count)


* Standardize time invariant variables to first value
foreach v in censusregion eiasectornumber  vintage  nercregion  operatorid operatorname sectorname state {
	bys facilityid (year month): replace `v' = `v'[1]
}

* Label
label var month "Month"
label var date "Date (YM)"
order date, after(month)

label var netgen "Monthly Net Gereation"
label var count "Month of Production"
label var vintage "First Year of Production"
label var tsfill_flag "Flag for Unreported Month in Raw Data"

* Save
save "${wind}/temp/netgen_data.dta"  , replace


